Read in the data

library (readr)

urlfile1="https://raw.githubusercontent.com/jsegich/STA553/main/data/income_per_person.csv"

urlfile2="https://raw.githubusercontent.com/jsegich/STA553/main/data/life_expectancy_years.csv"

urlfile3="https://raw.githubusercontent.com/jsegich/STA553/main/data/population_total.csv"

urlfile4="https://raw.githubusercontent.com/jsegich/STA553/main/data/countries_total.csv"

income_per_person<-read_csv(url(urlfile1))

life_expectancy_years<-read_csv(url(urlfile2))

population_total<-read_csv(url(urlfile3))

countries_total<-read_csv(url(urlfile4))

Prepare a single data set based on data set 12

income_per_person_fin <- income_per_person %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Income",  # the numerical values of the table
         - geo,          # drop country variable: its value will not be gathered (stacked)!
         na.rm = FALSE)       # removing records with missing values

life_expectancy_years_fin <- life_expectancy_years %>%
  gather(key = "Year",       # the column names of the wide table
         value = "Life Expectancy",  # the numerical values of the table
         - geo,          # drop country variable: its value will not be gathered (stacked)!
         na.rm = FALSE)       # removing records with missing values    

population_total_fin <- population_total %>%
  gather(key = "Year",      
         value = "Population",  
         - geo,          
         na.rm = FALSE) 

#Join Life Expectancy and Income Per Person

LifeExpIncom <- full_join(life_expectancy_years_fin,income_per_person_fin)

#Change year variable to be 4 digits

LifeExpIncom <- LifeExpIncom  %>%
                      mutate(year = substr(Year,1,5)) %>%
                      select(-Year)

#Merge income and life exp with Country info

innerjoin = merge(x = LifeExpIncom, y = countries_total, by.x = 'geo', by.y ='name', all = TRUE)


##Final data set 

Final_Set <- full_join(LifeExpIncom,innerjoin)

#Need a set that includes population information and region info

#rename year to match Year in other set

population_total_fin<- rename(population_total_fin,year='Year')

#Join the sets

innerjoin2 = merge(x = population_total_fin, y = countries_total, by.x = 'geo', by.y ='name', all = TRUE)

Fin= full_join(x=Final_Set,y=innerjoin2 , by.x = 'geo', by.y ='geo', all = TRUE)

#Subset the data to only include 2015

Sub_2015<- subset(Final_Set, Final_Set$year=="2015")

Sub_2015.population<-subset(population_total_fin, population_total_fin$year=="2015")

#Change the variable name

Sub_2015<- rename(Sub_2015,Life.Expectancy='Life Expectancy')

Final_Set<- rename(Final_Set,Life.Expectancy= 'Life Expectancy')

# Add population total

Final_Sub_2015 = merge(x=Sub_2015,Sub_2015.population , by.x = 'geo', by.y ='geo', all = FALSE)

Here we create a scatter plot for data from 2015

fig <- Final_Sub_2015 %>%
  plot_ly(
    type = 'scatter',
    mode = 'markers',
    alpha  = 0.5,
    x = ~Final_Sub_2015$Life.Expectancy,
    sizes = c(5, 20),
    y = ~Final_Sub_2015$Income,
    size = sqrt(Final_Sub_2015$Population),
    color = Final_Sub_2015$geo,
    text = ~paste('Population:', Population, '<br>Country:', geo),
    marker = list(symbol = 'circle', sizemode = 'diameter',
                      line = list(width = 2))

  ) 
 
fig %>% layout(
  
title = list(text = "Life Expectance vs. Income"),
    
xaxis = list( 
                    title=list(text = 'Life Expectancy')),

yaxis = list( 
                    title=list(text = 'Income in USD'))

)

Here we create an animated plot

#Subset to only include variables of interest

Finy<-mutate(Fin,Num_year=as.numeric(Fin$year))

Fin<-Finy[,c("Life Expectancy","Income","region", "Population", "Num_year") ]

#Omit Missing Values

Fin<-na.omit(Fin)

p <- ggplot(data=Fin, aes(x = Fin$`Life Expectancy`, 
                           y=Fin$Income, 
                           size = Population, 
                           colour = region)) +
        geom_point(aes(size = Population, ids = region ),
                   show.legend = TRUE, 
                   alpha = 0.4) +
        scale_size(range = c(2, 12)) +
        scale_y_log10() +
        labs(x = "Life Expectancy", 
             y = "Income in USD",
             title="                              Life Expectancy vs. Income") +
  
        ## gganimate command
       transition_time(Fin$Num_year)
## 
animate(p, renderer = gifski_renderer())

Read in the data

urlfile1="https://raw.githubusercontent.com/jsegich/STA553/main/data/POC.csv"

gas_stations<-read_csv(url(urlfile1))

head(gas_stations)
## # A tibble: 6 x 32
##      X1 site_row_id STATE county  ADDRESS  CITY  ycoord xcoord SITE_DESCRIPTION 
##   <dbl> <chr>       <chr> <chr>   <chr>    <chr>  <dbl>  <dbl> <chr>            
## 1     1 1-3R8J-494  CA    Los An… 37120 4… PALM…   34.6  -118. Los Angeles-Long…
## 2     2 1-3R8J-362  WA    Frankl… 1212 N … PASCO   46.2  -119. Kennewick-Pasco-…
## 3     3 1-3R8J-199  NV    Washoe  99 DAMO… RENO    39.4  -120. Reno-Sparks NV   
## 4     4 1-3R8J-261  UT    Salt L… 5404 S … SALT…   40.7  -112. Salt Lake City UT
## 5     5 1-3R8J-493  CA    Los An… 1731 E … LANC…   34.7  -118. Los Angeles-Long…
## 6     6 1-3R8J-508  WA    Benton  2707 S … KENN…   46.2  -119. Kennewick-Pasco-…
## # … with 23 more variables: service_or_fuel <chr>, diesel <chr>,
## #   twentyfour_hour_flag <chr>, car_wash <chr>, truckstop_flag <chr>,
## #   description <chr>, PUMP_TECH <chr>, POC <dbl>, HIFCA <dbl>, ZIPnew <dbl>,
## #   POCAGE <dbl>, POCGAP <dbl>, ZIPPOC <dbl>, HFG <dbl>, MSA <dbl>,
## #   dist.to.poc <dbl>, cate.poc.density <chr>, cate.poc.age <chr>,
## #   cate.poc.age.20 <chr>, cate.poc.intensity <chr>,
## #   cate.poc.intensity.tot <chr>, MSA_POC <dbl>, MSA_POC.1 <dbl>

Create random sample of 500 Gas stations, and provide requested information for each

library(leaflet)

gas_sub= sample_n(gas_stations, 500)

str(gas_sub)
## spec_tbl_df [500 × 32] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ X1                    : num [1:500] 27190 1628 46878 44830 15993 ...
##  $ site_row_id           : chr [1:500] "1-BZSA-45" "1-3UVO-2155" "1-14W9NQO" "1-10AGCZ5" ...
##  $ STATE                 : chr [1:500] "OH" "VA" "MD" "TN" ...
##  $ county                : chr [1:500] "Franklin" "Pulaski" "Baltimore" "Gibson" ...
##  $ ADDRESS               : chr [1:500] "1043 W BRD ST" "2471 LOWMANS FERRY RD R" "4501 HOLLINS FERRY RD" "67 GREENFIELD HWY" ...
##  $ CITY                  : chr [1:500] "COLUMBUS" "PULASKI" "HALETHORPE" "BRADFORD" ...
##  $ ycoord                : num [1:500] 40 37 39.2 36.1 34.1 ...
##  $ xcoord                : num [1:500] -83 -80.7 -76.7 -88.8 -116.3 ...
##  $ SITE_DESCRIPTION      : chr [1:500] "Columbus OH" "RURAL" "Baltimore-Towson MD" "RURAL" ...
##  $ service_or_fuel       : chr [1:500] "Fuel" "Fuel" "Fuel" "Fuel" ...
##  $ diesel                : chr [1:500] "Y" "Y" "Y" "Y" ...
##  $ twentyfour_hour_flag  : chr [1:500] "Y" "N" "Y" "Y" ...
##  $ car_wash              : chr [1:500] "N" "N" "N" "N" ...
##  $ truckstop_flag        : chr [1:500] "N" "N" "Y" "N" ...
##  $ description           : chr [1:500] "URBAN" "RURAL" "URBAN" "RURAL" ...
##  $ PUMP_TECH             : chr [1:500] "O" "O" "O" "O" ...
##  $ POC                   : num [1:500] 0 0 0 0 0 0 0 0 0 0 ...
##  $ HIFCA                 : num [1:500] 1 1 1 0 1 0 0 0 1 1 ...
##  $ ZIPnew                : num [1:500] 43222 24301 21227 38316 92252 ...
##  $ POCAGE                : num [1:500] NA NA NA NA NA NA NA NA NA NA ...
##  $ POCGAP                : num [1:500] NA NA NA NA NA NA NA NA NA NA ...
##  $ ZIPPOC                : num [1:500] 0 0 0 0 0 0 0 1 0 0 ...
##  $ HFG                   : num [1:500] 0 0 0 0 1 0 0 0 0 0 ...
##  $ MSA                   : num [1:500] 1840 0 720 0 6780 0 3810 0 6640 1920 ...
##  $ dist.to.poc           : num [1:500] 7.93 18.52 11.65 20.63 39.81 ...
##  $ cate.poc.density      : chr [1:500] "(1,5]" "(-1e-06,1]" "(1,5]" "(-1e-06,1]" ...
##  $ cate.poc.age          : chr [1:500] "(0,15]" "(140,Inf]" "(15,140]" "(140,Inf]" ...
##  $ cate.poc.age.20       : chr [1:500] "(0,15]" "(15,140]" "(15,140]" "(140,Inf]" ...
##  $ cate.poc.intensity    : chr [1:500] "(0,5]" "(-0.0001,0]" "(5,Inf]" "(-0.0001,0]" ...
##  $ cate.poc.intensity.tot: chr [1:500] "(0,8]" "(-0.0001,0]" "(8,Inf]" "(-0.0001,0]" ...
##  $ MSA_POC               : num [1:500] 0 0 1 0 0 0 0 0 0 1 ...
##  $ MSA_POC.1             : num [1:500] 0 0 1 0 0 0 0 0 0 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   site_row_id = col_character(),
##   ..   STATE = col_character(),
##   ..   county = col_character(),
##   ..   ADDRESS = col_character(),
##   ..   CITY = col_character(),
##   ..   ycoord = col_double(),
##   ..   xcoord = col_double(),
##   ..   SITE_DESCRIPTION = col_character(),
##   ..   service_or_fuel = col_character(),
##   ..   diesel = col_character(),
##   ..   twentyfour_hour_flag = col_character(),
##   ..   car_wash = col_character(),
##   ..   truckstop_flag = col_character(),
##   ..   description = col_character(),
##   ..   PUMP_TECH = col_character(),
##   ..   POC = col_double(),
##   ..   HIFCA = col_double(),
##   ..   ZIPnew = col_double(),
##   ..   POCAGE = col_double(),
##   ..   POCGAP = col_double(),
##   ..   ZIPPOC = col_double(),
##   ..   HFG = col_double(),
##   ..   MSA = col_double(),
##   ..   dist.to.poc = col_double(),
##   ..   cate.poc.density = col_character(),
##   ..   cate.poc.age = col_character(),
##   ..   cate.poc.age.20 = col_character(),
##   ..   cate.poc.intensity = col_character(),
##   ..   cate.poc.intensity.tot = col_character(),
##   ..   MSA_POC = col_double(),
##   ..   MSA_POC.1 = col_double()
##   .. )
label.msg <- paste(paste("State:", gas_sub$STATE),paste("Zip:", gas_sub$ZIPnew), paste("\n County:",gas_sub$county),"\n")

redicon <- makeIcon(
  iconUrl = "https://raw.githubusercontent.com/jsegich/STA553/main/data/0f61ba72e0e12ba59d30a50295964871.png?raw=true",
  iconWidth = 60, iconHeight = 60
  )
# define a leaflet map 

leaflet(gas_sub) %>%
  addTiles() %>% 
  setView(lng=mean(gas_sub$xcoord), lat=mean(gas_sub$ycoord), zoom = 4) %>%

  addMarkers(~gas_sub$xcoord, ~gas_sub$ycoord,label = ~label.msg, icon=redicon)